Youtube comment analysis using PrimeText


In [1]:
%matplotlib


Using matplotlib backend: MacOSX

In [2]:
import sys
import pandas as pa
import numpy as np
from PrimeText import PrimeText
import matplotlib.pyplot as plt

In [3]:
pt = PrimeText()
ytData = pa.read_csv("utubeES.csv",encoding ='ISO-8859-1')
comments = ytData['comment']

In [9]:
pt.cleanData(comments)
pt.assembleDictionary()
pt.indexDictionary()
pt.indexComments()


Records cleaned : 4513 / 4513
Records checked : 3924
Indexed dictionary
Indexed comments

In [12]:
keyText = []
keyCount = []
for key, value in pt.indexedDictionary.items():
    c = pt.countInRecords([key])
    keyText.append(key)
    keyCount.append(c)
    
s1 = pa.Series(keyCount,index=keyText)

sortedS1  = s1.sort_values(ascending= False)[:50]

sortedS1.plot.bar()


Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0x117d03ba8>

In [6]:
df = pa.DataFrame(index=sortedS1.index, columns=sortedS1.index)
df = df.fillna(0)

In [7]:
names = sortedS1.index
colsdone = 0
for col in names:
    colsdone += 1
    sys.stdout.write("\rCols done : %i" % colsdone)
    for row in names:
        df[col][row] = pt.countInRecords([col,row])
sys.stdout.write("\n")
sys.stdout.flush()


Cols done : 50

In [11]:
imgplot = plt.imshow(df,interpolation="nearest")
plt.xticks( range(len(names)), names, rotation=90 )
plt.yticks( range(len(names)), names, rotation=0 )
plt.colorbar()
plt.show()

In [ ]:


In [ ]: